Submission by:Ashwanth Narlapally¶

UNEMPLOYMENT ANALYSIS WITH PYTHON¶

Data science¶

Task 2¶

In [1]:
#importing requried libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
In [2]:
#reading datasets
data=pd.read_csv("Unemployment in India.csv")
data=pd.read_csv("Unemployment_Rate_upto_11_2020 (1).csv")
data.head(5)
Out[2]:
Region Date Frequency Estimated Unemployment Rate (%) Estimated Employed Estimated Labour Participation Rate (%) Region.1 longitude latitude
0 Andhra Pradesh 31-01-2020 M 5.48 16635535 41.02 South 15.9129 79.74
1 Andhra Pradesh 29-02-2020 M 5.83 16545652 40.90 South 15.9129 79.74
2 Andhra Pradesh 31-03-2020 M 5.79 15881197 39.18 South 15.9129 79.74
3 Andhra Pradesh 30-04-2020 M 20.51 11336911 33.10 South 15.9129 79.74
4 Andhra Pradesh 31-05-2020 M 17.43 12988845 36.46 South 15.9129 79.74
In [3]:
#checking is there any null values are present in our dataset
data.isnull().sum()
Out[3]:
Region                                      0
 Date                                       0
 Frequency                                  0
 Estimated Unemployment Rate (%)            0
 Estimated Employed                         0
 Estimated Labour Participation Rate (%)    0
Region.1                                    0
longitude                                   0
latitude                                    0
dtype: int64
In [4]:
#changing columns 
data.columns = ['States', 'Date', 'Frequency', 'Estimated Unemployment Rate',
                'Estimated Employed', 'Estimated Labour Participation Rate',
                'Region', 'longitude', 'latitude']
data.head(5)
Out[4]:
States Date Frequency Estimated Unemployment Rate Estimated Employed Estimated Labour Participation Rate Region longitude latitude
0 Andhra Pradesh 31-01-2020 M 5.48 16635535 41.02 South 15.9129 79.74
1 Andhra Pradesh 29-02-2020 M 5.83 16545652 40.90 South 15.9129 79.74
2 Andhra Pradesh 31-03-2020 M 5.79 15881197 39.18 South 15.9129 79.74
3 Andhra Pradesh 30-04-2020 M 20.51 11336911 33.10 South 15.9129 79.74
4 Andhra Pradesh 31-05-2020 M 17.43 12988845 36.46 South 15.9129 79.74
In [5]:
#checking how many  rows and columns are present in our dataset
data.shape
Out[5]:
(267, 9)
In [6]:
#describtion about statistical analysis
data.describe()
Out[6]:
Estimated Unemployment Rate Estimated Employed Estimated Labour Participation Rate longitude latitude
count 267.000000 2.670000e+02 267.000000 267.000000 267.000000
mean 12.236929 1.396211e+07 41.681573 22.826048 80.532425
std 10.803283 1.336632e+07 7.845419 6.270731 5.831738
min 0.500000 1.175420e+05 16.770000 10.850500 71.192400
25% 4.845000 2.838930e+06 37.265000 18.112400 76.085600
50% 9.650000 9.732417e+06 40.390000 23.610200 79.019300
75% 16.755000 2.187869e+07 44.055000 27.278400 85.279900
max 75.850000 5.943376e+07 69.690000 33.778200 92.937600
In [7]:
#checking number states
data.States.unique()
Out[7]:
array(['Andhra Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Delhi', 'Goa',
       'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir',
       'Jharkhand', 'Karnataka', 'Kerala', 'Madhya Pradesh',
       'Maharashtra', 'Meghalaya', 'Odisha', 'Puducherry', 'Punjab',
       'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Telangana', 'Tripura',
       'Uttar Pradesh', 'Uttarakhand', 'West Bengal'], dtype=object)
In [8]:
#checking number of Regions
data.Region.unique()
Out[8]:
array(['South', 'Northeast', 'East', 'West', 'North'], dtype=object)
In [9]:
#checking Estimated Employed rate of india
data.columns=['States', 'Date', 'Frequency', 'Estimated Unemployment Rate',
                'Estimated Employed', 'Estimated Labour Participation Rate',
                'Region', 'longitude', 'latitude']
plt.figure(figsize=(7,6))
plt.title("Estimated Employed Rate of India")
sns.histplot(x="Estimated Employed", hue="Region", data=data)
plt.show()
In [10]:
#Estimated Labour Participation Rate based on Regions of Indian
plt.figure(figsize=(7,6))
plt.title("Estimated Labour Participation Rate based on Regions of Indian ")
sns.histplot(x="Estimated Labour Participation Rate", hue="Region", data=data)
plt.show()
In [11]:
#estimating Labour Participation Rate for each region and states
fig = px.histogram(data, x='Estimated Labour Participation Rate',y='States' ,color='Region')
fig.show()
In [12]:
#checking unemployment rate according to different regions of India
data.columns=['States', 'Date', 'Frequency', 'Estimated Unemployment Rate',
                'Estimated Employed', 'Estimated Labour Participation Rate',
                'Region', 'longitude', 'latitude']
plt.figure(figsize=(7, 6))
plt.title("Unemployment Rate According to Different Regions of Indian ")
sns.histplot(x="Estimated Unemployment Rate", hue="Region", data=data)
plt.show()
In [13]:
#Avg Estimated Unemployment Rate for each state and region 
plot_Estimated_Unemployment = data[['Estimated Unemployment Rate','States']]
Estimated_Unemployment_Rate= plot_Estimated_Unemployment.groupby('States').mean().reset_index()
Estimated_Unemployment_Rate = Estimated_Unemployment_Rate.sort_values('Estimated Unemployment Rate')
fig = px.scatter(Estimated_Unemployment_Rate, x='States',y='Estimated Unemployment Rate',color='States',title='Average Estimated Unemployment Rate in each state',template='plotly')
fig.show()
In [14]:
#estimating average empolyed rate in each state
plot_Estimated_Employed = data[['Estimated Employed','States']]
Estimated_Employed= plot_Estimated_Employed .groupby('States').mean().reset_index()
Estimated_Employed = Estimated_Employed.sort_values('Estimated Employed')
fig = px.histogram(Estimated_Employed, x='States',y='Estimated Employed',color='States',title='Average Estimated Employed in each state',template='plotly')
fig.show()
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: